library(tidyverse)
library(gt)
library(broom)
library(multiwayvcov)

# Function to calculate 95% CIs by wild bootstrap

wild.boot.ci <- function(model, data) {
  vcov <- cluster.boot(model, cluster = data$fipsstate, boot_type = "wild", R = 500)
  
  sim_coeffs <- mvtnorm::rmvnorm(n = 10000, 
                                 mean = model$coeff,
                                 sigma = vcov) %>% 
    as_tibble()
  
  ci <- quantile(sim_coeffs$z, c(0.025, 0.975)) %>% 
    bind_rows(quantile(sim_coeffs$ZTRUE, c(0.025, 0.975))) %>% 
    bind_rows(quantile(sim_coeffs$Zz, c(0.025, 0.975)))
  return(ci)
}

##### SETUP DATA #####

# `x` is dist
# `z` is expansion
# `Z` is high eligibility (above median)
# `Zx` is high eligibility * dist
# `zx` is expansion * dist
# `Zz` is high eligibility * expansion
# `Zzx` is high eligibility * dist * expansion

with_leip <- read_rds("prepped-data/dataset_leip_dataverse.rds") %>% 
  filter(abs(x) < 100) %>% 
  mutate(`Z` = pcteligible2013,
         `Z` = `Z` >= median(`Z`),
         `Zx` = `Z` * `x`,
         `Zz` = `Z` * `z`,
         `Zzx` = `Z` * `zx`)

##### MODELS #####

# Model for 2014-2010 reg, no covariates
# Calculate CI table

model_1410 <- lm(data = with_leip,
                 formula = dregistration20142010 ~ `z` + `Z` + `Zz` + `x` + `zx` + `Zx` + `Zzx`)
ci_1410 <- wild.boot.ci(model_1410, with_leip) %>% 
  mutate(across(where(is.numeric), round, 2)) %>% 
  mutate(ci = paste("[", `2.5%`, ", ", `97.5%`, "]", sep = "")) %>% 
  select(ci)

# Model for 2014-2010 reg, with covariates
# Calculate CI table
# Left out `Gub2014` because coef is NA

model_1410_covar <- lm(data = with_leip,
                       formula = dregistration20142010 ~ `z` + `Z` + `Zz` + `x` + `zx` + `Zx` + 
                         `Zzx` + pctwhite + pct65plus + pcths + lmedianincome + logvap2014 + 
                         demvoteshare2012 + registration2010 + swing2012 + Gub2010 + Sen2010 + 
                         Gub2010Sen2010 + Sen2014 + Gub2014Sen2014)
ci_1410_covar <- wild.boot.ci(model_1410_covar, with_leip) %>% 
  mutate(across(where(is.numeric), round, 2)) %>% 
  mutate(ci = paste("[", `2.5%`, ", ", `97.5%`, "]", sep = "")) %>% 
  select(ci)

# Model for 2016-2012 reg, no covariates
# Calculate CI table

model_1612 <- lm(data = with_leip,
                 formula = dregistration20162012 ~ `z` + `Z` + `Zz` + `x` + `zx` + `Zx` + `Zzx`)
ci_1612 <- wild.boot.ci(model_1612, with_leip) %>% 
  mutate(across(where(is.numeric), round, 2)) %>% 
  mutate(ci = paste("[", `2.5%`, ", ", `97.5%`, "]", sep = "")) %>% 
  select(ci)

# Model for 2016-2012 reg, with covariates
# Calculate CI table

model_1612_covar <- lm(data = with_leip,
                       formula = dregistration20162012 ~ `z` + `Z` + `Zz` + `x` + `zx` + `Zx` + 
                         `Zzx` + pctwhite + pct65plus + pcths + lmedianincome + logvap2016 + 
                         demvoteshare2012 + registration2012 + swing2012)
ci_1612_covar <- wild.boot.ci(model_1612_covar, with_leip) %>% 
  mutate(across(where(is.numeric), round, 2)) %>% 
  mutate(ci = paste("[", `2.5%`, ", ", `97.5%`, "]", sep = "")) %>% 
  select(ci)

##### BUILD TABLE #####

coefs <- tidy(model_1410) %>% 
  slice(2:4) %>% 
  select(term, estimate) %>% 
  inner_join((tidy(model_1410_covar) %>% slice(2:4) %>% select(term, estimate)), by = "term") %>% 
  inner_join((tidy(model_1612) %>% slice(2:4) %>% select(term, estimate)), by = "term") %>% 
  inner_join((tidy(model_1612_covar) %>% slice(2:4) %>% select(term, estimate)), by = "term") %>% 
  mutate(across(where(is.numeric), round, 2)) %>% 
  bind_cols(ci_1410, ci_1410_covar, ci_1612, ci_1612_covar) %>% 
  unite("(1)", c(2, 6), sep = "<br>") %>% 
  unite("(2)", c(3, 6), sep = "<br>") %>% 
  unite("(3)", c(4, 6), sep = "<br>") %>% 
  unite("(4)", c(5, 6), sep = "<br>")
with_counties <- rbind(coefs,
                       c("Number of Counties", nobs(model_1410), nobs(model_1410_covar),
                         nobs(model_1612), nobs(model_1612_covar)),
                       c("Window", rep("100 miles", 4)),
                       c("Covariates", "No", "Yes", "No", "Yes"),
                       c("R-squared", round(summary(model_1410)$r.squared, 2), 
                         round(summary(model_1410_covar)$r.squared, 2),
                         round(summary(model_1612)$r.squared, 2),
                         round(summary(model_1612_covar)$r.squared, 2))
                       ) %>% 
  rename(`(1)` = 2, `(2)` = 3, `(3)` = 4, `(4)` = 5)

with_counties %>% 
  gt() %>% 
  fmt_markdown(columns = TRUE) %>% 
  tab_spanner(
    label = "Registration 2014-2010",
    columns = vars(`(1)`, `(2)`)
  ) %>% 
  tab_spanner(
    label = "Registration 2016-2012",
    columns = vars(`(3)`, `(4)`)
  ) %>% 
  cols_label(
    term = ""
  ) %>% 
  tab_header(
    title = "TABLE 2. Effect of Medicaid Expansion on Voter Registration"
  )